"""
Collection-specific RAG Search Engine

Provides semantic search within a specific document collection using RAG.
"""

from typing import List, Dict, Any, Optional
from loguru import logger

from .search_engine_library import LibraryRAGSearchEngine
from ...research_library.services.library_rag_service import LibraryRAGService
from ...database.models.library import RAGIndex, Document
from ...research_library.services.pdf_storage_manager import PDFStorageManager
from ...database.session_context import get_user_db_session
from ...config.thread_settings import get_setting_from_snapshot


class CollectionSearchEngine(LibraryRAGSearchEngine):
    """
    Search engine for a specific document collection using RAG.
    Directly searches only the specified collection's FAISS index.
    Each collection uses its own embedding model that was used during indexing.
    """

    # Mark as local RAG engine
    is_local = True

    def __init__(
        self,
        collection_id: str,
        collection_name: str,
        llm: Optional[Any] = None,
        max_filtered_results: Optional[int] = None,
        max_results: int = 10,
        settings_snapshot: Optional[Dict[str, Any]] = None,
        **kwargs,
    ):
        """
        Initialize the collection-specific search engine.

        Args:
            collection_id: UUID of the collection to search within
            collection_name: Name of the collection for display
            llm: Language model for relevance filtering
            max_filtered_results: Maximum number of results to keep after filtering
            max_results: Maximum number of search results
            settings_snapshot: Settings snapshot from thread context
            **kwargs: Additional engine-specific parameters
        """
        super().__init__(
            llm=llm,
            max_filtered_results=max_filtered_results,
            max_results=max_results,
            settings_snapshot=settings_snapshot,
            **kwargs,
        )
        self.collection_id = collection_id
        self.collection_name = collection_name
        self.collection_key = f"collection_{collection_id}"

        # Load collection-specific embedding settings
        self._load_collection_embedding_settings()

    def _load_collection_embedding_settings(self):
        """
        Load embedding settings from the collection's RAG index.
        Uses the same embedding model that was used during indexing.
        """
        if not self.username:
            logger.warning("Cannot load collection settings without username")
            return

        try:
            with get_user_db_session(self.username) as db_session:
                # Get RAG index for this collection
                rag_index = (
                    db_session.query(RAGIndex)
                    .filter_by(
                        collection_name=self.collection_key,
                        is_current=True,
                    )
                    .first()
                )

                if not rag_index:
                    logger.warning(
                        f"No RAG index found for collection {self.collection_id}"
                    )
                    return

                # Use embedding settings from the RAG index
                self.embedding_model = rag_index.embedding_model
                self.embedding_provider = rag_index.embedding_model_type.value
                self.chunk_size = rag_index.chunk_size or self.chunk_size
                self.chunk_overlap = (
                    rag_index.chunk_overlap or self.chunk_overlap
                )

                logger.info(
                    f"Collection '{self.collection_name}' using embedding: "
                    f"{self.embedding_provider}/{self.embedding_model}"
                )

        except Exception:
            logger.exception(
                f"Error loading collection {self.collection_id} settings"
            )

    def search(
        self,
        query: str,
        limit: int = 10,
        llm_callback=None,
        extra_params: Optional[Dict[str, Any]] = None,
    ) -> List[Dict[str, Any]]:
        """
        Search within the specific collection using semantic search.

        Directly searches only this collection's FAISS index instead of
        searching all collections and filtering.

        Args:
            query: Search query
            limit: Maximum number of results to return
            llm_callback: Optional LLM callback for processing results
            extra_params: Additional search parameters

        Returns:
            List of search results from this collection
        """
        if not self.username:
            logger.error("Cannot search collection without username")
            return []

        try:
            # Get RAG index info for this collection
            with get_user_db_session(self.username) as db_session:
                rag_index = (
                    db_session.query(RAGIndex)
                    .filter_by(
                        collection_name=self.collection_key,
                        is_current=True,
                    )
                    .first()
                )

                if not rag_index:
                    logger.info(
                        f"No RAG index for collection '{self.collection_name}'"
                    )
                    return []

                # Get embedding settings from RAG index
                embedding_model = rag_index.embedding_model
                embedding_provider = rag_index.embedding_model_type.value
                chunk_size = rag_index.chunk_size or self.chunk_size
                chunk_overlap = rag_index.chunk_overlap or self.chunk_overlap

            # Create RAG service with collection's embedding settings
            rag_service = LibraryRAGService(
                username=self.username,
                embedding_model=embedding_model,
                embedding_provider=embedding_provider,
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
            )

            # Check if there are indexed documents
            stats = rag_service.get_rag_stats(self.collection_id)
            if stats.get("indexed_documents", 0) == 0:
                logger.info(
                    f"No documents indexed in collection '{self.collection_name}'"
                )
                return []

            # Load and search the FAISS index for this collection
            vector_store = rag_service.load_or_create_faiss_index(
                self.collection_id
            )

            docs_with_scores = vector_store.similarity_search_with_score(
                query, k=limit
            )

            if not docs_with_scores:
                logger.info(
                    f"No results found in collection '{self.collection_name}'"
                )
                return []

            # Convert to search result format
            results = []
            for doc, score in docs_with_scores:
                metadata = doc.metadata or {}

                # Get document ID
                doc_id = metadata.get("source_id") or metadata.get(
                    "document_id"
                )

                # Get title
                title = (
                    metadata.get("document_title")
                    or metadata.get("title")
                    or (f"Document {doc_id}" if doc_id else "Untitled")
                )

                # Create snippet from content
                snippet = (
                    doc.page_content[:500] + "..."
                    if len(doc.page_content) > 500
                    else doc.page_content
                )

                # Generate document URL
                document_url = self._get_document_url(doc_id)

                # Add collection info to metadata
                metadata["collection_id"] = self.collection_id
                metadata["collection_name"] = self.collection_name

                result = {
                    "title": title,
                    "snippet": snippet,
                    "url": document_url,
                    "link": document_url,
                    "source": "library",
                    "relevance_score": float(1 / (1 + score)),
                    "metadata": metadata,
                }
                results.append(result)

            logger.info(
                f"Collection '{self.collection_name}' search returned "
                f"{len(results)} results for query: {query[:50]}..."
            )

            return results

        except Exception:
            logger.exception(
                f"Error searching collection '{self.collection_name}'"
            )
            return []

    def _get_document_url(self, doc_id: Optional[str]) -> str:
        """Get the URL for viewing a document."""
        if not doc_id:
            return "#"

        try:
            with get_user_db_session(self.username) as session:
                document = session.query(Document).filter_by(id=doc_id).first()
                if document:
                    # Use PDFStorageManager to check if PDF available
                    from pathlib import Path

                    library_root = get_setting_from_snapshot(
                        "research_library.storage_path",
                        self.settings_snapshot,
                        "~/.local-deep-research/library",
                    )
                    library_root = Path(library_root).expanduser()
                    pdf_manager = PDFStorageManager(library_root, "auto")
                    if pdf_manager.has_pdf(document, session):
                        return f"/library/document/{doc_id}/pdf"
                    return f"/library/document/{doc_id}/txt"
        except Exception as e:
            logger.warning(f"Error getting document URL for {doc_id}: {e}")

        return f"/library/document/{doc_id}/txt"
